library(foreign)
library(plyr)
library(xtable)
library(arm)	
library(Amelia)	
library(gmodels)

load('imputed_data.Rdata')
source('colnames2labels.R')

uniq.panel <- factor(appeals$uniq.panel)
appeals <- appeals[,-which(colnames(appeals) %in% c('uniq.panel','court_TLV'))]


#### CODE TO CLUSTER STANDARD ERRORS ####
###Function to compute clustered standard errors
clse.f.vcov <- function(dat,fm, cluster){
	require(sandwich)
	require(lmtest)
	not <- attr(fm$model,"na.action")
	if( ! is.null(not)){
		cluster <- cluster[-not]
		dat <- dat[-not,]
	}
	with(dat,{
	M <- length(unique(cluster))
	N <- length(cluster)	
	K <- fm$rank
	dfc <- (M/(M-1))*((N-1)/(N-K))
	uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
	vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
	return(list('coefficients' = coeftest(fm, vcovCL), 'vcov' = vcovCL))
	}
	)
}	


n.impute <- 5

####DEFINE OUTCOME#####
outcome.stub <- 'Prison_term'
outcome <- paste(outcome.stub,'D',sep = '_')
outcome.past <- paste(outcome.stub,'M',sep = '_')
form.red <- as.formula(paste(outcome, '~ narabs_J*accused_arab +',outcome.past))

##STRIP OUT ZERO VALUES of outcome
for(i in c(1:n.impute)){
	zero.vals <- a.out$imputations[[i]][,which(colnames(a.out$imputations[[i]]) == outcome)] == 0
	a.out$imputations[[i]] <- a.out$imputations[[i]][-which(zero.vals),]
	if(i == 1) uniq.panel <- factor(uniq.panel[-which(zero.vals)])
}
n.model <- 4
################################# FIT MODEL ##################################
full.model.vars <- list(
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record'),#Model 1: Basic model - just treatment, court effect, and accused covariates
#Model 2: Additional accused covariates + judge covariates
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j'),
#Model 3: Additional accused covariates + judge covariates + offense covariates
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j','offense1','offense2','offense3','offense4','victimRace2'),
#Model 4: Additional accused covariates + judge covariates + offense covariates + court process covariates
 c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j','offense1','offense2','offense3','offense4','victimRace2','prequest2','methodGuilty','methodTrial'))


my.model.tables <- list()
aic.vals <- vector(length = n.model) 
se.diffs <- list() #DIAGNOSTICS
for(j in c(1:length(full.model.vars))){
	vars.of.interest <- full.model.vars[[j]]
	form.full <- as.formula(paste(outcome, '~ narabs_J*accused_arab +',outcome.past, '+', paste(vars.of.interest, 	collapse='+')))
	lm.final <- llply(a.out$imputations, function(dat){lm(form.full, data = dat)})	
	#function to get covariance of interaction terms
	extract.cov <- function(my.cov){
		my.cov[which(rownames(my.cov) == 'narabs_J'),
		which(colnames(my.cov) == 'narabs_J:accused_arab')]
	}
	#pull needed estimates out of model
	lm.tables <- list()
	se.diffs[[j]] <- matrix(nrow = length(lm.final[[1]]$coef), ncol = n.impute) #DIAGNOSTICS
 	for(i in c(1:n.impute)){
		#clustered errors
		cluster.robust <- clse.f.vcov(a.out$imputations[[i]], lm.final[[i]], uniq.panel)
		coef.table <- cluster.robust$coefficients
		se.diffs[[j]][,i] <- summary(lm.final[[i]])$coefficients[,2] - coef.table[,2] #DIAGNOSTICS	
		#add AIC info
		aic.table <- cbind(c( extractAIC(lm.final[[i]]), length(lm.final[[i]]$fitted.values) ), matrix(NA, nrow = 3, ncol = 3))	
		
		rownames(aic.table) <- c('N. Predictors','AIC Value','Observations')
		
		#add marginal effect for mixed panel with arab accused
		coeff.mix <- coef.table[which(rownames(coef.table) == 'narabs_J'),]
		coeff.mix.arab <- coef.table[which(rownames(coef.table) == 'narabs_J:accused_arab'),]
		marg.eff.arab <- c(coeff.mix[1] + coeff.mix.arab[1], 
			sqrt(coeff.mix[2]^2 + coeff.mix.arab[2]^2 + 2*extract.cov(cluster.robust$vcov)),
			NA,NA) #t-stat and p-value are not meaningful for marginal effect 
		marg.eff.jew <- coeff.mix
		lm.tables[[i]] <- rbind(coef.table, aic.table, marg.eff.jew, marg.eff.arab)
	}

	#combine amelia estimates
	coeffs.mat <- laply(lm.tables, function(x){x[,1]})
	stderr.mat <- laply(lm.tables, function(x){x[,2]})
	regr.df <- lm.final[[i]]$df.residual #save degrees of freedom for use in computing p-values later
	meld.out <- mi.meld(coeffs.mat, stderr.mat)
	#lm1 <- mi.meld(est.mat, stderr.mat)

	final.table <- as.data.frame(cbind(t(meld.out$q.mi), t(meld.out$se.mi)))

	colnames(final.table) <- c('Estimate','Std.Error')
	final.table$t.value <- final.table$Estimate/final.table$Std.Error
	final.table$p.value <- 2 - 2*pt(abs(final.table$t.value), df = regr.df)
	nr <- nrow(final.table)
	my.model.tables[[j]] <- final.table
}

table.sections <- llply(my.model.tables, function(x){x[,c(1,2,4)]})
all.rownames <- unique(unlist(llply(table.sections, rownames)))
colname.stubs <- c('Est','SE','Pval')
all.colnames <- paste(colname.stubs, as.vector(t(matrix(rep(c(1:n.model),3), ncol = 3))), sep = '_')
models.tab <- matrix(0, ncol = length(all.colnames), nrow = length(all.rownames), dimnames = list(all.rownames, all.colnames))

expand.tab <- function(source.tab,target.names){
	row.idx <- match(rownames(source.tab), target.names)
	out.mat <- matrix(NA,ncol = ncol(source.tab), nrow = length(target.names))
	out.mat[row.idx,] <- as.matrix(source.tab)
	return(out.mat)
}
for(i in c(1:length(table.sections))){
	models.tab[,c(3*i-2,3*i - 1, 3*i)] <- expand.tab(table.sections[[i]],rownames(models.tab))
}

var.counts <- laply(table.sections, nrow)
aic.info <- rbind(var.counts - 3, aic.vals) #subtract intercept, marg.effects
rownames(aic.info) <- c('Number of Predictors','AIC Value')
colnames(aic.info) <- paste('Model', c(1:n.model))

################################# MAKE TABLE 12 ##################################
#build marginal effects table
marg.effect.tab <- rbind(
	models.tab[which(rownames(models.tab) == 'marg.eff.jew'),],
	models.tab[which(rownames(models.tab) == 'marg.eff.arab'),])
marg.effect.tab <- marg.effect.tab[,-3*(c(1:n.model))] #drop p.vals
rownames(marg.effect.tab) <- c('Marginal Effect for Jewish Accused', 'Marginal Effect for Arab Accused')


#clean up main table
latex.tab <- round(models.tab,3)
#drop marginal effect row
#latex.tab <- latex.tab[-which(rownames(latex.tab) %in% c('marg.eff.jew','marg.eff.arab')),]
#rearrange rows so interesting coefficients are at top
aa.idx <- which(rownames(latex.tab) == 'accused_arab')
nJ.idx <- which(rownames(latex.tab) == 'narabs_J')
nJ.aa.idx <- which(rownames(latex.tab) == 'narabs_J:accused_arab')
int.idx <- which(rownames(latex.tab) == '(Intercept)')
n.pred.idx <- which(rownames(latex.tab) == 'N. Predictors')
aic.idx <- which(rownames(latex.tab) == 'AIC Value')
obs.idx <- which(rownames(latex.tab) == 'Observations')
mj.idx <- which(rownames(latex.tab) == 'marg.eff.jew')
ma.idx <- which(rownames(latex.tab) == 'marg.eff.arab')
new.order <- c(aa.idx,nJ.idx, nJ.aa.idx, c(1:nrow(latex.tab))[-c(nJ.idx, nJ.aa.idx, aa.idx, int.idx, n.pred.idx, aic.idx, obs.idx, mj.idx, ma.idx)], int.idx, n.pred.idx, aic.idx, obs.idx, mj.idx, ma.idx)
latex.tab <- latex.tab[new.order,]
latex.tab[is.na(latex.tab)] <- ''

#add parentheses around standard errors
stderr.idx <- grep('SE', x = colnames(latex.tab))
latex.tab[,stderr.idx] <- apply(latex.tab[,stderr.idx], 2, function(x){paste('(',x,')',sep = '')})

latex.tab[latex.tab == '()'] <- ''

print("TABLE 12")
print(xtable(names2labels(latex.tab, col.or.row = 'row')))

################################# MAKE FIGURE 2 (second part) ##################################

#graph marginal effects with their confints
stderr.idx <- 2*c(1:(ncol(marg.effect.tab)/2))
eff.idx <- stderr.idx - 1

N <- ncol(marg.effect.tab)/2
m.effects <- c(marg.effect.tab[1,eff.idx], marg.effect.tab[2,eff.idx])
discrep <- c(marg.effect.tab[1,stderr.idx], marg.effect.tab[2, stderr.idx])*1.96
m.effects.up <- m.effects + discrep
m.effects.dwn <- m.effects - discrep

ylims <- c( min(-1, min(m.effects.dwn, na.rm = TRUE)), max(1, max(m.effects.up, na.rm = TRUE)))

#plot effects
plot(rep(c(1:N),2) + c(rep(-0.1,N),rep(0.1,N)), m.effects, ylim = ylims, main =paste('Prison Term'),
	 xlab = 'Model Number', ylab = 'Marginal Effect (months)',
	pch = 20, xaxt = 'n')
axis(1, at = 1:4)
#plot confidence intervals
for(i in rep(c(1:N))){
	lines(rep(i,2) - 0.1,c(m.effects.up[i], m.effects.dwn[i]), lty = 'dotted')
	lines(rep(i,2) + 0.1,c(m.effects.up[i + N], m.effects.dwn[i + N]), lty = 'solid')
}
#extra plot detail
abline(h = 0, lty = 'dotted')
legend('topleft', c('Jewish Accused','Arab Accused'), lty = c('dotted','solid'))
